{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Random Forest for Titanic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.random.seed(10)\n",
    "\n",
    "import pandas as pd\n",
    "import re\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.decomposition import PCA\n",
    "from decisiontree import *\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv(\"datasets/titanic/titanic_training.csv\")\n",
    "test = pd.read_csv(\"datasets/titanic/titanic_testing_data.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "train.fillna(train.mean(),inplace=True)\n",
    "test.fillna(train.mean(),inplace=True)\n",
    "\n",
    "mode_sex = list(train[\"sex\"])\n",
    "mode_ticket = list(train[\"ticket\"])\n",
    "mode_embarked = list(train[\"embarked\"])\n",
    "reps = {\"sex\":max(set(mode_sex),key=mode_sex.count),\n",
    "        \"ticket\":max(set(mode_ticket),key=mode_ticket.count),\n",
    "        \"embarked\":max(set(mode_embarked),key=mode_embarked.count)}\n",
    "\n",
    "train.fillna(reps,inplace=True)\n",
    "test.fillna(reps,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#one hot cabin training set\n",
    "cabin = train[[\"cabin\"]].copy()\n",
    "cabin[\"cabin\"] = cabin[\"cabin\"].str.slice(0,1)\n",
    "mode_cabin = list(cabin[\"cabin\"])\n",
    "cabin[\"cabin\"] = cabin[\"cabin\"].fillna(\n",
    "                max(set(mode_cabin),\n",
    "                key=mode_cabin.count))\n",
    "one_hot = pd.get_dummies(cabin[\"cabin\"])\n",
    "train = pd.concat([train, cabin], axis=1, sort=False)\n",
    "train = train.loc[:,~train.columns.duplicated()] #removed extra T col\n",
    "\n",
    "#one hot cabin test set\n",
    "cabin = test[[\"cabin\"]].copy()\n",
    "cabin[\"cabin\"] = cabin[\"cabin\"].str.slice(0,1)\n",
    "mode_cabin = list(cabin[\"cabin\"])\n",
    "cabin[\"cabin\"] = cabin[\"cabin\"].fillna(\n",
    "                max(set(mode_cabin),\n",
    "                key=mode_cabin.count))\n",
    "one_hot = pd.get_dummies(cabin[\"cabin\"])\n",
    "# test.drop(columns=[\"cabin\"],axis=1,inplace=True)\n",
    "test = pd.concat([test, cabin], axis=1, sort=False)\n",
    "test = test.loc[:,~train.columns.duplicated()] #removed extra T col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#one hot sex training set\n",
    "train = pd.concat([train,\n",
    "                   pd.get_dummies(train[\"sex\"], \n",
    "                      prefix=\"sex\")],axis=1)\n",
    "train.drop([\"sex\"], axis=1,inplace=True)\n",
    "\n",
    "#one hot sex test set\n",
    "test = pd.concat([test,\n",
    "                  pd.get_dummies(test[\"sex\"], \n",
    "                     prefix=\"sex\")],axis=1)\n",
    "test.drop([\"sex\"], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#one hot embarked training set\n",
    "train = pd.concat([train,\n",
    "                   pd.get_dummies(train[\"embarked\"],\n",
    "                        prefix=\"embarked\")],axis=1)\n",
    "train.drop([\"embarked\"], axis=1, inplace=True)\n",
    "\n",
    "#one hot embarked test set\n",
    "test = pd.concat([test,\n",
    "                  pd.get_dummies(test[\"embarked\"], \n",
    "                         prefix=\"embarked\")],axis=1)\n",
    "test.drop([\"embarked\"], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getnum(s):\n",
    "    s = re.findall(r\"\\d+\",str(s))\n",
    "    return int(s[0]) if len(s)>=1 else -1\n",
    "\n",
    "train[\"ticket\"] = train[\"ticket\"].apply(lambda s: getnum(s))\n",
    "test[\"ticket\"] = test[\"ticket\"].apply(lambda s: getnum(s))\n",
    "train[\"ticket\"] = train[\"ticket\"].apply(lambda s: train[\"ticket\"].mean() if s==-1 else s)\n",
    "test[\"ticket\"] = test[\"ticket\"].apply(lambda s: test[\"ticket\"].mean() if s==-1 else s)\n",
    "\n",
    "train_y = train[\"survived\"].values\n",
    "train.drop([\"survived\"], axis=1, inplace=True)\n",
    "\n",
    "train.drop(columns=[\"cabin\"],axis=1,inplace=True)\n",
    "test.drop(columns=[\"cabin\"],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((800, 11), (310, 11), numpy.ndarray, pandas.core.frame.DataFrame)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = train.values\n",
    "x_test = test.values\n",
    "x_train, x_val, y_train, y_val = \\\n",
    "train_test_split(train, train_y, test_size=0.2)\n",
    "x_train.shape,test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2019-03-27 19:00:46,698\tWARNING worker.py:1406 -- WARNING: Not updating worker name since `setproctitle` is not installed. Install this with `pip install setproctitle` (or ray[debug]) to enable monitoring of worker processes.\n",
      "2019-03-27 19:00:46,700\tINFO node.py:423 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-27_19-00-46_61611/logs.\n",
      "2019-03-27 19:00:46,810\tINFO services.py:363 -- Waiting for redis server at 127.0.0.1:25264 to respond...\n",
      "2019-03-27 19:00:46,927\tINFO services.py:363 -- Waiting for redis server at 127.0.0.1:23534 to respond...\n",
      "2019-03-27 19:00:46,933\tINFO services.py:760 -- Starting Redis shard with 3.44 GB max memory.\n",
      "2019-03-27 19:00:46,948\tINFO services.py:1384 -- Starting the Plasma object store with 5.15 GB memory using /tmp.\n",
      "Exception in thread ray_import_thread:\n",
      "Traceback (most recent call last):\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 185, in _read_from_socket\n",
      "    raise socket.error(SERVER_CLOSED_CONNECTION_ERROR)\n",
      "OSError: Connection closed by server.\n",
      "\n",
      "During handling of the above exception, another exception occurred:\n",
      "\n",
      "Traceback (most recent call last):\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/threading.py\", line 916, in _bootstrap_inner\n",
      "    self.run()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/threading.py\", line 864, in run\n",
      "    self._target(*self._args, **self._kwargs)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/import_thread.py\", line 70, in _run\n",
      "    msg = import_pubsub_client.get_message()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3135, in get_message\n",
      "    response = self.parse_response(block=False, timeout=timeout)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3036, in parse_response\n",
      "    return self._execute(connection, connection.read_response)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3013, in _execute\n",
      "    return command(*args)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 637, in read_response\n",
      "    response = self._parser.read_response()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 290, in read_response\n",
      "    response = self._buffer.readline()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 224, in readline\n",
      "    self._read_from_socket()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 199, in _read_from_socket\n",
      "    (e.args,))\n",
      "redis.exceptions.ConnectionError: Error while reading from socket: ('Connection closed by server.',)\n",
      "\n",
      "Exception in thread ray_listen_error_messages:\n",
      "Traceback (most recent call last):\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 185, in _read_from_socket\n",
      "    raise socket.error(SERVER_CLOSED_CONNECTION_ERROR)\n",
      "OSError: Connection closed by server.\n",
      "\n",
      "During handling of the above exception, another exception occurred:\n",
      "\n",
      "Traceback (most recent call last):\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/threading.py\", line 916, in _bootstrap_inner\n",
      "    self.run()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/threading.py\", line 864, in run\n",
      "    self._target(*self._args, **self._kwargs)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\", line 1758, in listen_error_messages_raylet\n",
      "    msg = worker.error_message_pubsub_client.get_message()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3135, in get_message\n",
      "    response = self.parse_response(block=False, timeout=timeout)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3036, in parse_response\n",
      "    return self._execute(connection, connection.read_response)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3013, in _execute\n",
      "    return command(*args)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 637, in read_response\n",
      "    response = self._parser.read_response()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 290, in read_response\n",
      "    response = self._buffer.readline()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 224, in readline\n",
      "    self._read_from_socket()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 199, in _read_from_socket\n",
      "    (e.args,))\n",
      "redis.exceptions.ConnectionError: Error while reading from socket: ('Connection closed by server.',)\n",
      "\n",
      "Exception in thread ray_print_logs:\n",
      "Traceback (most recent call last):\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 185, in _read_from_socket\n",
      "    raise socket.error(SERVER_CLOSED_CONNECTION_ERROR)\n",
      "OSError: Connection closed by server.\n",
      "\n",
      "During handling of the above exception, another exception occurred:\n",
      "\n",
      "Traceback (most recent call last):\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/threading.py\", line 916, in _bootstrap_inner\n",
      "    self.run()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/threading.py\", line 864, in run\n",
      "    self._target(*self._args, **self._kwargs)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\", line 1655, in print_logs\n",
      "    msg = pubsub_client.get_message()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3135, in get_message\n",
      "    response = self.parse_response(block=False, timeout=timeout)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3036, in parse_response\n",
      "    return self._execute(connection, connection.read_response)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/client.py\", line 3013, in _execute\n",
      "    return command(*args)\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 637, in read_response\n",
      "    response = self._parser.read_response()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 290, in read_response\n",
      "    response = self._buffer.readline()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 224, in readline\n",
      "    self._read_from_socket()\n",
      "  File \"/anaconda3/envs/cs189/lib/python3.6/site-packages/redis/connection.py\", line 199, in _read_from_socket\n",
      "    (e.args,))\n",
      "redis.exceptions.ConnectionError: Error while reading from socket: ('Connection closed by server.',)\n",
      "\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<timed exec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n",
      "\u001b[0;32m~/Documents/GitHub/cs189/hw5/HW5_codes/Q2_decision_tree/decisiontree.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y)\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0mTODO\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfit\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0mto\u001b[0m \u001b[0ma\u001b[0m \u001b[0mtraining\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    190\u001b[0m         \"\"\"\n\u001b[0;32m--> 191\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrained_trees\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparallel_grow_tree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_trees\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    192\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    193\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(object_ids)\u001b[0m\n\u001b[1;32m   2301\u001b[0m         \u001b[0;32mglobal\u001b[0m \u001b[0mlast_task_error_raise_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2302\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2303\u001b[0;31m             \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mworker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject_ids\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2304\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2305\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRayError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\u001b[0m in \u001b[0;36mget_object\u001b[0;34m(self, object_ids)\u001b[0m\n\u001b[1;32m    535\u001b[0m                     max([\n\u001b[1;32m    536\u001b[0m                         \u001b[0mray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_config\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_timeout_milliseconds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 537\u001b[0;31m                         \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.01\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0munready_ids\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    538\u001b[0m                     ]),\n\u001b[1;32m    539\u001b[0m                 )\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\u001b[0m in \u001b[0;36mretrieve_and_deserialize\u001b[0;34m(self, object_ids, timeout, error_timeout)\u001b[0m\n\u001b[1;32m    409\u001b[0m                         \u001b[0mobject_ids\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mi\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    410\u001b[0m                         \u001b[0mtimeout\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m                         \u001b[0mwith_meta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    412\u001b[0m                     )\n\u001b[1;32m    413\u001b[0m                     \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetadata_data_pairs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/utils.py\u001b[0m in \u001b[0;36m_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    473\u001b[0m                 \u001b[0;32mdef\u001b[0m \u001b[0m_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    474\u001b[0m                     \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 475\u001b[0;31m                         \u001b[0;32mreturn\u001b[0m \u001b[0morig_attr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    477\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wrapper_cache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/_plasma.pyx\u001b[0m in \u001b[0;36mpyarrow._plasma.PlasmaClient.get_buffers\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/_plasma.pyx\u001b[0m in \u001b[0;36mpyarrow._plasma.PlasmaClient._get_object_buffers\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.check_status\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/compat.py\u001b[0m in \u001b[0;36mfrombytes\u001b[0;34m(o)\u001b[0m\n\u001b[1;32m    140\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m     \u001b[0;32mdef\u001b[0m \u001b[0mfrombytes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mo\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    143\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'utf8'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "%%time\n",
    "rf = RandomForest(rand_features=\"log\",num_trees=20,header=\"titanic_randomforest\")\n",
    "rf.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "ename": "ArrowIOError",
     "evalue": "Broken pipe",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mArrowIOError\u001b[0m                              Traceback (most recent call last)",
      "\u001b[0;32m<timed exec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n",
      "\u001b[0;32m~/Documents/GitHub/cs189/hw5/HW5_codes/Q2_decision_tree/decisiontree.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, mode)\u001b[0m\n\u001b[1;32m    200\u001b[0m         \"\"\"\n\u001b[1;32m    201\u001b[0m         \u001b[0;31m# print(\"confirming tree properties\",len(self.trained_trees),type(self.trained_trees[0]))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 202\u001b[0;31m         \u001b[0mall_preds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparallel_predict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrained_trees\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_trees\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    203\u001b[0m         \u001b[0mfinal_preds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_along_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mCounter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmost_common\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mall_preds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    204\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"test\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Documents/GitHub/cs189/hw5/HW5_codes/Q2_decision_tree/decisiontree.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    200\u001b[0m         \"\"\"\n\u001b[1;32m    201\u001b[0m         \u001b[0;31m# print(\"confirming tree properties\",len(self.trained_trees),type(self.trained_trees[0]))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 202\u001b[0;31m         \u001b[0mall_preds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparallel_predict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrained_trees\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_trees\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    203\u001b[0m         \u001b[0mfinal_preds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_along_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mCounter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmost_common\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mall_preds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    204\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"test\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/remote_function.py\u001b[0m in \u001b[0;36mremote\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     71\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mremote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m         \u001b[0;34m\"\"\"This runs immediately when a remote function is called.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_remote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m     def _submit(self,\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/remote_function.py\u001b[0m in \u001b[0;36m_remote\u001b[0;34m(self, args, kwargs, num_return_vals, num_cpus, num_gpus, resources)\u001b[0m\n\u001b[1;32m    129\u001b[0m             \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    130\u001b[0m             \u001b[0mnum_return_vals\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_return_vals\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 131\u001b[0;31m             resources=resources)\n\u001b[0m\u001b[1;32m    132\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject_ids\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    133\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mobject_ids\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\u001b[0m in \u001b[0;36msubmit_task\u001b[0;34m(self, function_descriptor, args, actor_id, actor_handle_id, actor_counter, actor_creation_id, actor_creation_dummy_object_id, max_actor_reconstructions, execution_dependencies, new_actor_handles, num_return_vals, resources, placement_resources, driver_id)\u001b[0m\n\u001b[1;32m    626\u001b[0m                     \u001b[0margs_for_local_scheduler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    627\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 628\u001b[0;31m                     \u001b[0margs_for_local_scheduler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    629\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    630\u001b[0m             \u001b[0;31m# By default, there are no execution dependencies.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\u001b[0m in \u001b[0;36mput\u001b[0;34m(value)\u001b[0m\n\u001b[1;32m   2337\u001b[0m             \u001b[0mworker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtask_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mput_index\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2338\u001b[0m         )\n\u001b[0;32m-> 2339\u001b[0;31m         \u001b[0mworker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mput_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2340\u001b[0m         \u001b[0mworker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtask_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mput_index\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2341\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mobject_id\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\u001b[0m in \u001b[0;36mput_object\u001b[0;34m(self, object_id, value)\u001b[0m\n\u001b[1;32m    369\u001b[0m         \u001b[0;31m# Serialize and put the object in the object store.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    370\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 371\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstore_and_register\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    372\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mpyarrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPlasmaObjectExists\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    373\u001b[0m             \u001b[0;31m# The object already exists in the object store, so there is no\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/worker.py\u001b[0m in \u001b[0;36mstore_and_register\u001b[0;34m(self, object_id, value, depth)\u001b[0m\n\u001b[1;32m    303\u001b[0m                         \u001b[0mmemcopy_threads\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmemcopy_threads\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    304\u001b[0m                         serialization_context=self.get_serialization_context(\n\u001b[0;32m--> 305\u001b[0;31m                             self.task_driver_id))\n\u001b[0m\u001b[1;32m    306\u001b[0m                 \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    307\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mpyarrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSerializationCallbackError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/utils.py\u001b[0m in \u001b[0;36m_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    473\u001b[0m                 \u001b[0;32mdef\u001b[0m \u001b[0m_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    474\u001b[0m                     \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 475\u001b[0;31m                         \u001b[0;32mreturn\u001b[0m \u001b[0morig_attr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    477\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wrapper_cache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/_plasma.pyx\u001b[0m in \u001b[0;36mpyarrow._plasma.PlasmaClient.put\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/_plasma.pyx\u001b[0m in \u001b[0;36mpyarrow._plasma.PlasmaClient.create\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/anaconda3/envs/cs189/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/error.pxi\u001b[0m in \u001b[0;36mpyarrow.lib.check_status\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mArrowIOError\u001b[0m: Broken pipe"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "train_preds = rf.predict(x_train,\"train\")\n",
    "print(\"training accuracy:\",np.mean(train_preds==y_train))\n",
    "val_preds = rf.predict(x_val,\"val\")\n",
    "print(\"validation accuracy:\",np.mean(val_preds==y_val))\n",
    "rf.predict(x_test,\"test\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
